Initial Setup¶

Notebook settings¶

Rob's COVID Analysis¶

Data source: https://coronavirus.data.gov.uk/details/download

In [1]:
# notebook settings
%config Completer.use_jedi = False

# set up imports
import pandas as pd
import numpy as np
from IPython.display import Markdown as md

# switch to DataTables view mode
from jupyter_datatables import init_datatables_mode
init_datatables_mode()

Get Source data¶

Pull in daily case and death data

In [2]:
# get data from government api
dataset = pd.read_csv('https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&metric=newCasesByPublishDate&metric=newDeaths28DaysByPublishDate&metric=newAdmissions&format=csv')
dataset_name = "UK"

#dataset = pd.read_csv('https://api.coronavirus.data.gov.uk/v2/data?areaType=utla&areaCode=E06000056&metric=newCasesByPublishDate&metric=newDeaths28DaysByPublishDate&metric=newAdmissions&format=csv')
#dataset_name = "Central Beds"

#dataset = pd.read_csv('https://api.coronavirus.data.gov.uk/v2/data?areaType=ltla&areaCode=E07000240&metric=newCasesByPublishDate&metric=newDeaths28DaysByPublishDate&metric=newAdmissions&format=csv')
#dataset_name = "St. Albans"

#dataset = pd.read_csv('https://api.coronavirus.data.gov.uk/v2/data?areaType=region&areaCode=E12000007&metric=newCasesByPublishDate&metric=newDeaths28DaysByPublishDate&metric=newAdmissions&format=csv')
#dataset_name = "London"
In [3]:
# drop unwanted columns and set index
dataset.drop(columns=['areaCode', 'areaName', 'areaType'], inplace=True)
dataset.set_index('date')

# flipe the direction of the data set
dataset = dataset.iloc[::-1]

# calcualte daily new case deltas
dataset['newCases_dailyDelta'] = dataset['newCasesByPublishDate'].diff()

datset_last90daysonly = dataset.tail(90)
In [4]:
md("## Most recent datapoint: {}".format(dataset.iloc[-1]['date']))
Out[4]:

Most recent datapoint: 2022-04-26¶

Graph the data¶

Set up Plotly

In [5]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

Plot uk daily deaths

In [6]:
fig = go.Figure()
fig = px.line(dataset, 
              x="date", 
              y="newCasesByPublishDate", 
              title= dataset_name + ' Daily New Cases',
             labels={
                 "date": "Date",
                 "newCasesByPublishDate": "New cases by publish date"
             })
fig.show()


fig = go.Figure()
fig = px.line(datset_last90daysonly, 
              x="date", 
              y="newCasesByPublishDate", 
              title= dataset_name + ' Daily New Cases (last 90 days)',
             labels={
                 "date": "Date",
                 "newCasesByPublishDate": "New cases by publish date"
             })
fig.show()

That looks a bit scrappy - let's switch to a seven-day rolling mean average and plot it again

In [7]:
dataset['newCases_rollingSevenDayMean'] = dataset['newCasesByPublishDate'].rolling(7).mean()
datset_last90daysonly = dataset.tail(90)

fig = go.Figure()
fig = px.line(
    dataset, 
    x="date", 
    y="newCases_rollingSevenDayMean", 
    title=dataset_name + " Daily New Cases (7-day rolling mean average)",
    labels={
        "date": "Date",
        "newCases_rollingSevenDayMean": "New cases (7 day rolling mean average)"})

fig.show()

fig = go.Figure()
fig = px.line(
    datset_last90daysonly, 
    x="date", 
    y="newCases_rollingSevenDayMean", 
    title=dataset_name + " Daily New Cases (7-day rolling mean average) - last 90 days",
    labels={
        "date": "Date",
        "newCases_rollingSevenDayMean": "New cases (7 day rolling mean average)"})

fig.show()

This looks better, but let's put them on the same graph

In [8]:
fig = make_subplots(
    specs=[[{"secondary_y": False}]],
    x_title = "Date",
    y_title = "Number of cases",
)

fig.add_trace(go.Scatter(
    x=dataset['date'],
    y=dataset['newCasesByPublishDate'],
    name='Daily new cases'))
              
fig.add_trace(go.Scatter(
    x=dataset['date'],
    y=dataset['newCases_rollingSevenDayMean'],
    name='Daily new cases (7-day rolling mean average)'))

fig.update_layout(
    title=dataset_name + " daily new cases",
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01))

fig.show()

fig = make_subplots(
    specs=[[{"secondary_y": False}]],
    x_title = "Date",
    y_title = "Number of cases",
)

fig.add_trace(go.Scatter(
    x=datset_last90daysonly['date'],
    y=datset_last90daysonly['newCasesByPublishDate'],
    name='Daily new cases'))
              
fig.add_trace(go.Scatter(
    x=datset_last90daysonly['date'],
    y=datset_last90daysonly['newCases_rollingSevenDayMean'],
    name='Daily new cases (7-day rolling mean average)'))

fig.update_layout(
    title=dataset_name + " daily new cases (last 90 days)",
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01))

fig.show()

Now lets look at the daily delta in cases

In [9]:
fig = go.Figure()
fig = px.line(dataset, x="date", y="newCases_dailyDelta", title=dataset_name + ' Daily New Cases - change from previous day')
fig.show()

fig = go.Figure()
fig = px.line(datset_last90daysonly, x="date", y="newCases_dailyDelta", title=dataset_name + ' Daily New Cases - change from previous day - last 90 days only')
fig.show()

Again - a bit scrappy - let's get a rolling seven day mean average and plot them together

In [10]:
dataset['newCases_dailyDelta_rollingSevenDayMean'] = dataset['newCases_dailyDelta'].rolling(7).mean()
datset_last90daysonly = dataset.tail(90)

fig = make_subplots(
    specs=[[{"secondary_y": False}]],
    x_title = "Date",
    y_title = "Daily delta in number of cases",
)

fig.add_trace(go.Scatter(
    x=dataset['date'],
    y=dataset['newCases_dailyDelta'],
    name='Change from previous day'))
              
fig.add_trace(go.Scatter(
    x=dataset['date'],
    y=dataset['newCases_dailyDelta_rollingSevenDayMean'],
    name='7-day rolling average change'))

fig.update_layout(
    title=dataset_name + " Daily New Cases - change from previous day",
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01))

fig.show()

fig = make_subplots(
    specs=[[{"secondary_y": False}]],
    x_title = "Date",
    y_title = "Daily delta in number of cases",
)

fig.add_trace(go.Scatter(
    x=datset_last90daysonly['date'],
    y=datset_last90daysonly['newCases_dailyDelta'],
    name='Change from previous day'))
              
fig.add_trace(go.Scatter(
    x=datset_last90daysonly['date'],
    y=datset_last90daysonly['newCases_dailyDelta_rollingSevenDayMean'],
    name='7-day rolling average change'))

fig.update_layout(
    title=dataset_name + " Daily New Cases - change from previous day - last 90 days only",
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01))

fig.show()
In [11]:
fig = make_subplots(
    specs=[[{"secondary_y": True}]],
    x_title = "Date"
)

fig.add_trace(go.Scatter(
    x=dataset['date'],
    y=dataset['newCasesByPublishDate'],
    name='New cases (left-hand scale)'))
              
fig.add_trace(go.Scatter(
    x=dataset['date'],
    y=dataset['newDeaths28DaysByPublishDate'],
    name='New deaths (right-hand scale)'),
    secondary_y=True)

fig.add_trace(go.Scatter(
    x=dataset['date'],
    y=dataset['newAdmissions'],
    name='New hospital admissions (right-hand scale)'),
    secondary_y=True)

fig.update_layout(
    title=dataset_name + " daily new cases, deaths, and hospital admissions",
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01))

fig.update_yaxes(title_text="New cases", secondary_y=False)
fig.update_yaxes(title_text="New deaths/hospital admissions", secondary_y=True)

fig.show()

fig = make_subplots(
    specs=[[{"secondary_y": True}]],
    x_title = "Date"
)

fig.add_trace(go.Scatter(
    x=datset_last90daysonly['date'],
    y=datset_last90daysonly['newCasesByPublishDate'],
    name='New cases (left-hand scale)'))
              
fig.add_trace(go.Scatter(
    x=datset_last90daysonly['date'],
    y=datset_last90daysonly['newDeaths28DaysByPublishDate'],
    name='New deaths (right-hand scale)'),
    secondary_y=True)

fig.add_trace(go.Scatter(
    x=datset_last90daysonly['date'],
    y=datset_last90daysonly['newAdmissions'],
    name='New hospital admissions (right-hand scale)'),
    secondary_y=True)

fig.update_layout(
    title=dataset_name + " daily new cases, deaths, and hospital admissions - last 90 days only",
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01))

fig.update_yaxes(title_text="New cases", secondary_y=False)
fig.update_yaxes(title_text="New deaths/hospital admissions", secondary_y=True)

fig.show()

Experimental Area¶

Alternate visualisations¶

In [12]:
fig = go.Figure()
fig = make_subplots(
    shared_xaxes=True,
    x_title = "date",
    y_title = "daily deaths",
    rows=2,
    cols=1)

fig.append_trace(go.Scatter(x=dataset.date, y=dataset.newCasesByPublishDate,
                    mode='lines',
                    name=dataset_name + ' Daily New Cases'),
                row=1, col=1)

fig.append_trace(go.Scatter(x=dataset.date, y=dataset.newDeaths28DaysByPublishDate,
                    mode='lines',
                    name=dataset_name + 'UK Daily Deaths'),
                 row=2, col=1)

fig.show()